#Set working directory
setwd("~/histone")
library("edgeR")
library("ggplot2")
library("data.table")
library("gridExtra")
library("gplots")
library("plyr")
library("stringr")
library("ggforce")
library("readr")
library("dplyr")
library("ggpubr")
library("tidyverse")
library('corrplot')
library('reshape2')
library("DESeq2")
library("RUVSeq")
library("EnhancedVolcano")
# Read peak counts and generate dge object 
files <- dir(path = "histone_read_counts", pattern = "*h3k27ac.read.counts.txt$")
counts <- readDGE(files, path = "histone_read_counts", header=F)$counts
# Read sample information
samples<-read.csv("h3k27ac/samples.csv")
dge <- DGEList(counts = counts, 
               group = samples$genotype,
               genes = rownames(counts)
)
dge$samples$group <- samples$genotype
#Filter out the low counts peaks
keep<-filterByExpr(dge)
table(keep)
dge <- dge[keep, , keep.lib.sizes=FALSE]
# normalizes for counts composition
dge <- calcNormFactors(dge)
saveRDS(dge, "h3k27ac/objs/dge_filt_norm.rds")

# get the log2(cpm) 
cpm <- data.table(dge$genes, cpm(dge, log = T))
write.csv(cpm, "h3k27ac/reports/cpm.csv", row.names = F)

##Find DER by edgeR
#Design the matrix by genotype
group<- factor(samples$genotype, levels = c("WT","HT"))
design<- model.matrix(~group)

# Explore data: BCV plot (Biological coefficient of variation)
dge<- estimateDisp(dge, design)
#Check the common BCV value
message("common dispersion = ", sqrt(dge$common.dispersion))
pdf("h3k27ac/figs/Before_RUV_BCV.pdf")
plotBCV(dge)
dev.off()

#Remove unwanted variation using Residuals
fit <- glmFit(dge, design)
fit_de_geno <- glmLRT(fit, coef = 2)
de_geno<-as.data.frame(topTags(fit_de_geno, n=Inf, sort.by = "PValue"))
write.csv(de_geno, "h3k27ac/reports/de_h3k27ac_geno.csv")

fit <- glmFit(dge, design)
res <- residuals(fit, type="deviance")
countsUQ<-betweenLaneNormalization(dge$counts, which="upper")
controls<-rownames(dge$counts)
counts_RUVr<-RUVr(countsUQ, controls, k=2, res)
counts_after_RUV<-counts_RUVr$normalizedCounts

#Get DGEs after RUVr normalization
dge_RUVr <- DGEList(counts = counts_after_RUV, 
                    group = samples$genotype,
                    genes = rownames(counts_after_RUV)
)
dge$samples$group <- samples$genotype

# get the log2(cpm) 
cpm_after_RUVr<- data.table(dge_RUVr$genes, cpm(dge_RUVr, log = T))
cpm_after_RUVr <- cpm_after_RUVr[,-3,with=FALSE]
write.csv(cpm_after_RUVr, "h3k27ac/reports/cpm_after_RUVr.csv", row.names = F)

# Explore data: BCV plot (Biological coefficient of variation)
dge_RUVr<- estimateDisp(dge_RUVr, design)

#Check the common BCV value
message("common dispersion = ", sqrt(dge_RUVr$common.dispersion))
pdf("h3k27ac/figs/After_RUV_BCV.pdf")
plotBCV(dge_RUVr)
dev.off()

#Find DGEs by glmFit
fit_RUVr <- glmFit(dge_RUVr, design)
fit_de_RUVr <- glmLRT(fit_RUVr, coef = 2)
de_RUVr<-as.data.frame(topTags(fit_de_RUVr, n=Inf, sort.by = "PValue"))
write.csv(de_RUVr, "h3k27ac/reports/de_h3k27ac_after_RUVr.csv")

##Volcano Plot
keyvals.colour1 <- ifelse(
  de_RUVr$FDR>0.1, '#fcffa4',
  ifelse(de_RUVr$logFC > 0, '#f89540',
         ifelse(de_RUVr$logFC < 0,'#7e03a8','#fcffa4')))

keyvals.colour1[is.na(keyvals.colour1)] <- '#fcffa4'
  names(keyvals.colour1)[keyvals.colour1 == '#f89540'] <- 'upregulated'
  names(keyvals.colour1)[keyvals.colour1 == '#fcffa4'] <- 'non-significant'
  names(keyvals.colour1)[keyvals.colour1 == '#7e03a8'] <- 'downregulated'
pdf("h3k27ac/figs/Volcano plot_Het-WT_H3K27ac.pdf", width = 6, height = 8)  
png("h3k27ac/figs/Volcano plot_Het-WT_H3K27ac.png", width = 600, height = 800) 
  EnhancedVolcano(de_RUVr,
                  lab = NA,
                  #selectLab = hits1$external.gene.name,
                  title = 'Het vs WT',
                  FCcutoff = 0,
                  pCutoff=0.1,
                  pointSize = 2.0,
                  xlim = c(-4, 2),
                  ylim = c(0, 8),
                  #labSize = 2.0,
                  colAlpha = 0.8,
                  boxedLabels = TRUE,
                  x = 'logFC',
                  y = 'FDR',
                  gridlines.major = TRUE,
                  gridlines.minor = FALSE,
                  border = 'full',
                  borderWidth = 1.0,
                  borderColour = 'black',
                  legendPosition = 'top',
                  legendLabSize = 10,
                  legendIconSize = 5,
                  colCustom = keyvals.colour1
  )
  dev.off()